Load and Prepare Data

poly <- readRDS(paste0(clean_path, "population_poly.rds"))

poly <- poly %>% select(research_case_id, bp, hr, temperature, inr, iss, iss_cat,
                        invasive, gcs, gcs_cat, sex, age, age_cat, bleeding, fracture,
                        concussion, brain_edema, brain_compression, unconsciousness)

# What to do with NA's?
# For heart rate and blood pressure must be documentation mistakes (always measured), so I exclude them in the analysis

poly <- poly %>% drop_na(bp, hr)

# Also, a HR of 865 seems hardly normal, take it out
poly <- poly %>% filter(hr < 250)

# For temp and inr missing probably indicates no problem and thus not measured. I will try to fit a model where I remove the NA's and another model where I convert the variables to ordered factors and classify NA's as in normal range (ASSUMPTION!).


# Create long-format dataset for BP and HR (hemorrhage)
poly_hemo <- melt(poly, 
                  id.vars = c("research_case_id", "gcs_cat", "iss_cat", "invasive", 
                              "sex", "age_cat", "bleeding", "fracture", "concussion", 
                              "brain_edema", "brain_compression", "unconsciousness"),
                  measure.vars = c("bp", "hr"),
                  variable.name = "vital_sign",
                  value.name = "value")

# Create combined GCS and vital sign variable
poly_hemo[, gcs_vital := paste(gcs_cat, vital_sign, sep = "_")]

Exploratory Data Analysis

# Distribution of ordered factors
table(poly$gcs_cat)
## 
##  0.unknown     1.mild 2.moderate   3.severe 
##       1530       4801        215        529
table(poly$iss_cat)
## 
##   1.1-8  2.9-15 3.16-24   4.25+ 
##    3665    2330     891     189
table(poly$age_cat)
## 
##   1.<30 2.30-39 3.40-49 4.50-59 5.60-69 6.70-78   7.79+ 
##    1196     956     843     935     808     866    1471
# Summary statistics of key variables
summary(poly[, .(bp, hr, temperature, inr)])
##        bp               hr          temperature         inr       
##  Min.   :  0.00   Min.   :  0.00   Min.   :23.10   Min.   : 0.80  
##  1st Qu.: 85.00   1st Qu.: 70.00   1st Qu.:36.40   1st Qu.: 1.10  
##  Median : 95.00   Median : 80.00   Median :36.70   Median : 1.10  
##  Mean   : 95.81   Mean   : 81.21   Mean   :36.67   Mean   : 1.22  
##  3rd Qu.:106.00   3rd Qu.: 90.00   3rd Qu.:37.10   3rd Qu.: 1.20  
##  Max.   :185.67   Max.   :183.00   Max.   :43.90   Max.   :10.00  
##                                    NA's   :618     NA's   :380
# Check missing values in key variables
missing_values <- colSums(is.na(poly))
print(missing_values)
##  research_case_id                bp                hr       temperature 
##                 0                 0                 0               618 
##               inr               iss           iss_cat          invasive 
##               380                 0                 0                 0 
##               gcs           gcs_cat               sex               age 
##              1530                 0                 0                 0 
##           age_cat          bleeding          fracture        concussion 
##                 0                 0                 0                 0 
##       brain_edema brain_compression   unconsciousness 
##                 0                 0                 0
# Distribution of key outcome variables
par(mfrow=c(2,2))
hist(poly$bp, main="Blood Pressure Distribution", xlab="BP")
hist(poly$hr, main="Heart Rate Distribution", xlab="HR")
hist(poly$temperature, main="Temperature Distribution", xlab="Temperature")
hist(poly$inr, main="INR Distribution", xlab="INR")

par(mfrow=c(1,1))

# Examine relationships between ordered factors and outcomes
boxplot(bp ~ gcs_cat, data = poly, 
        main = "Blood Pressure by GCS Category", xlab = "GCS Category", ylab = "BP")

boxplot(hr ~ gcs_cat, data = poly,
        main = "Heart Rate by GCS Category", xlab = "GCS Category", ylab = "HR")

boxplot(temperature ~ gcs_cat, data = poly,
        main = "Temperature by GCS Category", xlab = "GCS Category", ylab = "Temperature")

boxplot(inr ~ gcs_cat, data = poly,
        main = "INR by GCS Category", xlab = "GCS Category", ylab = "INR")

Model 1: Blood Pressure and Heart Rate (Hemorrhage)

model1 <- lmer(value ~ vital_sign * gcs_cat + iss_cat + invasive + sex + age_cat + 
               bleeding + fracture + concussion + brain_edema + brain_compression +
               unconsciousness + (1|research_case_id), data = poly_hemo)

# Alternative model with sex interaction
model1_sex <- lmer(value ~ vital_sign * gcs_cat + vital_sign * sex + iss_cat + invasive +
                   age_cat + bleeding + fracture + concussion + brain_edema +
                   brain_compression + unconsciousness + (1|research_case_id), 
                   data = poly_hemo)

# Model summaries
summary(model1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: value ~ vital_sign * gcs_cat + iss_cat + invasive + sex + age_cat +  
##     bleeding + fracture + concussion + brain_edema + brain_compression +  
##     unconsciousness + (1 | research_case_id)
##    Data: poly_hemo
## 
## REML criterion at convergence: 119549.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.8052 -0.6345 -0.0586  0.5535  5.1260 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  research_case_id (Intercept)  29.81    5.46   
##  Residual                     245.89   15.68   
## Number of obs: 14150, groups:  research_case_id, 7075
## 
## Fixed effects:
##                          Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)             9.297e+01  5.305e-01  1.041e+04 175.256  < 2e-16 ***
## vital_signhr           -9.910e+00  4.770e-01  7.071e+03 -20.774  < 2e-16 ***
## gcs_cat.L              -7.392e+00  6.831e-01  1.317e+04 -10.821  < 2e-16 ***
## gcs_cat.Q              -3.756e+00  7.297e-01  1.383e+04  -5.148 2.67e-07 ***
## gcs_cat.C              -6.839e-01  8.037e-01  1.392e+04  -0.851   0.3948    
## iss_cat.L              -5.756e-01  6.647e-01  7.054e+03  -0.866   0.3865    
## iss_cat.Q              -1.334e+00  5.357e-01  7.054e+03  -2.489   0.0128 *  
## iss_cat.C               1.550e-01  3.884e-01  7.054e+03   0.399   0.6898    
## invasive1               6.818e-01  7.635e-01  7.054e+03   0.893   0.3719    
## sexm                    7.219e-01  3.183e-01  7.054e+03   2.268   0.0233 *  
## age_cat.L               3.341e+00  3.881e-01  7.054e+03   8.609  < 2e-16 ***
## age_cat.Q               9.456e-02  3.816e-01  7.054e+03   0.248   0.8043    
## age_cat.C               5.400e-01  3.961e-01  7.054e+03   1.363   0.1728    
## age_cat^4               5.121e-01  4.023e-01  7.054e+03   1.273   0.2031    
## age_cat^5               8.605e-01  4.210e-01  7.054e+03   2.044   0.0410 *  
## age_cat^6              -3.532e-01  4.177e-01  7.054e+03  -0.846   0.3978    
## bleeding                2.505e-01  5.345e-01  7.054e+03   0.469   0.6394    
## fracture               -3.396e-01  4.958e-01  7.054e+03  -0.685   0.4934    
## concussion              2.469e-01  4.006e-01  7.054e+03   0.616   0.5377    
## brain_edema             2.827e+00  2.248e+00  7.054e+03   1.258   0.2085    
## brain_compression       1.964e+00  2.983e+00  7.054e+03   0.658   0.5103    
## unconsciousness        -1.796e+00  4.120e-01  7.054e+03  -4.360 1.32e-05 ***
## vital_signhr:gcs_cat.L  1.598e+01  8.261e-01  7.071e+03  19.346  < 2e-16 ***
## vital_signhr:gcs_cat.Q  5.461e+00  9.540e-01  7.071e+03   5.724 1.08e-08 ***
## vital_signhr:gcs_cat.C  7.024e-01  1.067e+00  7.071e+03   0.658   0.5103    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model1_sex)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## value ~ vital_sign * gcs_cat + vital_sign * sex + iss_cat + invasive +  
##     age_cat + bleeding + fracture + concussion + brain_edema +  
##     brain_compression + unconsciousness + (1 | research_case_id)
##    Data: poly_hemo
## 
## REML criterion at convergence: 119548.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.8098 -0.6339 -0.0583  0.5553  5.1305 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  research_case_id (Intercept)  29.8     5.459  
##  Residual                     245.9    15.681  
## Number of obs: 14150, groups:  research_case_id, 7075
## 
## Fixed effects:
##                          Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)             9.311e+01  5.571e-01  1.176e+04 167.142  < 2e-16 ***
## vital_signhr           -1.019e+01  5.859e-01  7.070e+03 -17.398  < 2e-16 ***
## gcs_cat.L              -7.370e+00  6.836e-01  1.318e+04 -10.782  < 2e-16 ***
## gcs_cat.Q              -3.760e+00  7.297e-01  1.383e+04  -5.153 2.59e-07 ***
## gcs_cat.C              -6.764e-01  8.038e-01  1.392e+04  -0.842   0.4000    
## sexm                    4.950e-01  4.189e-01  1.379e+04   1.182   0.2374    
## iss_cat.L              -5.756e-01  6.647e-01  7.054e+03  -0.866   0.3865    
## iss_cat.Q              -1.334e+00  5.357e-01  7.054e+03  -2.489   0.0128 *  
## iss_cat.C               1.550e-01  3.884e-01  7.054e+03   0.399   0.6898    
## invasive1               6.818e-01  7.635e-01  7.054e+03   0.893   0.3719    
## age_cat.L               3.341e+00  3.881e-01  7.054e+03   8.609  < 2e-16 ***
## age_cat.Q               9.456e-02  3.816e-01  7.054e+03   0.248   0.8043    
## age_cat.C               5.400e-01  3.961e-01  7.054e+03   1.363   0.1728    
## age_cat^4               5.121e-01  4.023e-01  7.054e+03   1.273   0.2031    
## age_cat^5               8.605e-01  4.210e-01  7.054e+03   2.044   0.0410 *  
## age_cat^6              -3.532e-01  4.177e-01  7.054e+03  -0.846   0.3978    
## bleeding                2.505e-01  5.345e-01  7.054e+03   0.469   0.6394    
## fracture               -3.396e-01  4.958e-01  7.054e+03  -0.685   0.4934    
## concussion              2.469e-01  4.006e-01  7.054e+03   0.616   0.5377    
## brain_edema             2.827e+00  2.248e+00  7.054e+03   1.258   0.2085    
## brain_compression       1.964e+00  2.983e+00  7.054e+03   0.658   0.5103    
## unconsciousness        -1.796e+00  4.120e-01  7.054e+03  -4.360 1.32e-05 ***
## vital_signhr:gcs_cat.L  1.594e+01  8.277e-01  7.070e+03  19.256  < 2e-16 ***
## vital_signhr:gcs_cat.Q  5.469e+00  9.541e-01  7.070e+03   5.732 1.03e-08 ***
## vital_signhr:gcs_cat.C  6.875e-01  1.067e+00  7.070e+03   0.644   0.5193    
## vital_signhr:sexm       4.538e-01  5.448e-01  7.070e+03   0.833   0.4048    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Compare models
anova(model1, model1_sex)
## Data: poly_hemo
## Models:
## model1: value ~ vital_sign * gcs_cat + iss_cat + invasive + sex + age_cat + bleeding + fracture + concussion + brain_edema + brain_compression + unconsciousness + (1 | research_case_id)
## model1_sex: value ~ vital_sign * gcs_cat + vital_sign * sex + iss_cat + invasive + age_cat + bleeding + fracture + concussion + brain_edema + brain_compression + unconsciousness + (1 | research_case_id)
##            npar    AIC    BIC logLik deviance  Chisq Df Pr(>Chisq)
## model1       27 119615 119819 -59780   119561                     
## model1_sex   28 119616 119828 -59780   119560 0.6944  1     0.4047

Note that following our discussion last time I also fit the model with an interaction with sex. However, neither the interaction the main effect were significant and it resulted in a higher AIC score. In the model without interaction, the main effect is significant, indicating higher bp and hr for men.

We can see a significant positive linear effect of age_cat.

Let’s look at some diagnostics plots

plot(model1) # Looks ok

qqnorm(resid(model1), main="Q-Q Plot of Residuals")
qqline(resid(model1))

# Half-normal plot of residuals (approximation using car package)
car::qqPlot(resid(model1), distribution="norm", envelope=0.95,
           main="Half-Normal Plot of Residuals", ylab="Residuals")

## [1] 3489 4990
# Predictions
poly_hemo$predicted <- predict(model1, newdata = poly_hemo, allow.new.levels = TRUE, re.form = NULL)  # Include random effects

# Plot predicted vs observed
ggplot(poly_hemo, aes(x=value, y=predicted)) +
  geom_point(alpha=0.5) +
  geom_abline(intercept=0, slope=1, color="red") +
  facet_wrap(~vital_sign, scales="fixed") +
  labs(title="Predicted vs Observed Values", x="Observed", y="Predicted") +
  theme_minimal()

# Examine interaction effects with ordered factors
ggplot(poly_hemo, aes(x=gcs_cat, y=value, fill=vital_sign)) +
  geom_boxplot() +
  facet_wrap(~vital_sign, scales="free_y") +
  labs(title="BP and HR by GCS Category", x="GCS Category", y="Value") +
  theme_minimal()

The qq plot looks like there is a bit of skewness, we could try to fit it with a log trafo.

model1_log <- lmer(log(value + 0.0001) ~ vital_sign * gcs_cat + iss_cat + invasive + sex +
               age_cat + bleeding + fracture + concussion + brain_edema +
               brain_compression + unconsciousness + (1|research_case_id), 
               data = poly_hemo)

summary(model1_log)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: log(value + 1e-04) ~ vital_sign * gcs_cat + iss_cat + invasive +  
##     sex + age_cat + bleeding + fracture + concussion + brain_edema +  
##     brain_compression + unconsciousness + (1 | research_case_id)
##    Data: poly_hemo
## 
## REML criterion at convergence: 2414.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -44.258  -0.401   0.023   0.414  15.633 
## 
## Random effects:
##  Groups           Name        Variance Std.Dev.
##  research_case_id (Intercept) 0.03188  0.1786  
##  Residual                     0.04380  0.2093  
## Number of obs: 14150, groups:  research_case_id, 7075
## 
## Fixed effects:
##                          Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)             4.495e+00  9.443e-03  8.834e+03 475.960  < 2e-16 ***
## vital_signhr           -1.218e-01  6.366e-03  7.071e+03 -19.133  < 2e-16 ***
## gcs_cat.L              -1.047e-01  1.160e-02  1.085e+04  -9.024  < 2e-16 ***
## gcs_cat.Q              -5.650e-02  1.216e-02  1.172e+04  -4.647 3.41e-06 ***
## gcs_cat.C              -1.234e-02  1.334e-02  1.191e+04  -0.925 0.354787    
## iss_cat.L              -5.057e-02  1.247e-02  7.054e+03  -4.055 5.07e-05 ***
## iss_cat.Q              -4.935e-02  1.005e-02  7.054e+03  -4.910 9.32e-07 ***
## iss_cat.C              -1.096e-02  7.287e-03  7.054e+03  -1.504 0.132538    
## invasive1               2.580e-02  1.433e-02  7.054e+03   1.801 0.071732 .  
## sexm                    4.892e-03  5.972e-03  7.054e+03   0.819 0.412767    
## age_cat.L               3.846e-02  7.281e-03  7.054e+03   5.282 1.32e-07 ***
## age_cat.Q              -5.247e-03  7.161e-03  7.054e+03  -0.733 0.463689    
## age_cat.C               6.182e-03  7.433e-03  7.054e+03   0.832 0.405580    
## age_cat^4               6.874e-03  7.549e-03  7.054e+03   0.911 0.362537    
## age_cat^5               8.666e-03  7.900e-03  7.054e+03   1.097 0.272688    
## age_cat^6              -2.900e-03  7.838e-03  7.054e+03  -0.370 0.711390    
## bleeding               -9.045e-03  1.003e-02  7.054e+03  -0.902 0.367156    
## fracture               -8.448e-04  9.303e-03  7.054e+03  -0.091 0.927648    
## concussion              1.662e-03  7.516e-03  7.054e+03   0.221 0.824960    
## brain_edema             4.868e-02  4.218e-02  7.054e+03   1.154 0.248452    
## brain_compression       4.291e-02  5.598e-02  7.054e+03   0.767 0.443403    
## unconsciousness        -1.281e-02  7.730e-03  7.054e+03  -1.657 0.097498 .  
## vital_signhr:gcs_cat.L  1.688e-01  1.103e-02  7.071e+03  15.314  < 2e-16 ***
## vital_signhr:gcs_cat.Q  4.796e-02  1.273e-02  7.071e+03   3.766 0.000167 ***
## vital_signhr:gcs_cat.C -7.866e-04  1.424e-02  7.071e+03  -0.055 0.955938    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model1, model1_log)
## Data: poly_hemo
## Models:
## model1: value ~ vital_sign * gcs_cat + iss_cat + invasive + sex + age_cat + bleeding + fracture + concussion + brain_edema + brain_compression + unconsciousness + (1 | research_case_id)
## model1_log: log(value + 1e-04) ~ vital_sign * gcs_cat + iss_cat + invasive + sex + age_cat + bleeding + fracture + concussion + brain_edema + brain_compression + unconsciousness + (1 | research_case_id)
##            npar    AIC    BIC logLik deviance  Chisq Df Pr(>Chisq)
## model1       27 119615 119819 -59780   119561                     
## model1_log   27   2278   2482  -1112     2224 117337  0
# Some Diagnostics
plot(model1_log)

qqnorm(resid(model1_log), main="Q-Q Plot of Residuals")
qqline(resid(model1_log))

# Half-normal plot of residuals (approximation using car package)
car::qqPlot(resid(model1), distribution="norm", envelope=0.95,
           main="Half-Normal Plot of Residuals", ylab="Residuals")

## [1] 3489 4990
# Predictions
poly_hemo$predicted_log <- predict(model1_log, newdata = poly_hemo, allow.new.levels = TRUE, re.form = NULL)  # Include random effects

poly_hemo$predicted_log_trafo <- exp(poly_hemo$predicted_log) - 0.0001

# Plot predicted vs observed
ggplot(poly_hemo, aes(x=value, y=predicted_log_trafo)) +
  geom_point(alpha=0.5) +
  geom_abline(intercept=0, slope=1, color="red") +
  facet_wrap(~vital_sign, scales="fixed") +
  labs(title="Predicted vs Observed Values", x="Observed", y="Predicted") +
  theme_minimal()

Changes in interpretation: * Sex & unconsciousness is not significant anymore * ISS not only quadratic barely significant but both linear and quadratic highly significant (negative), which is to be expected

At least to me, qqplot and predictions look better with a log trafo!

However, we can see a few outliers. What worries me is that they did not show before the log trafo. We can have a look at them:

# TODO

# wrong ones before

Model 2: Temperature as Outcome

Note that since we only have temperature (single outcome variable; continuous) and no multivariate case here, there’s no point in fitting a lmer() model, since it’s not sensible to have random effects for research_case_id (here, they are unique!).

poly_temp <- poly %>% drop_na(temperature)

qqnorm(poly_temp$temperature, main="Q-Q Plot for Temperature")
qqline(poly_temp$temperature)

model2 <- lm(temperature ~ gcs_cat + iss_cat + invasive + sex + age_cat + 
             bleeding + fracture + concussion + brain_edema + brain_compression +
             unconsciousness, data = poly_temp)

summary(model2)
## 
## Call:
## lm(formula = temperature ~ gcs_cat + iss_cat + invasive + sex + 
##     age_cat + bleeding + fracture + concussion + brain_edema + 
##     brain_compression + unconsciousness, data = poly_temp)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.8229  -0.3107   0.0125   0.3595   8.0535 
## 
## Coefficients:
##                    Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)       36.280295   0.028738 1262.469  < 2e-16 ***
## gcs_cat.L         -0.705540   0.033036  -21.357  < 2e-16 ***
## gcs_cat.Q         -0.212251   0.033377   -6.359 2.17e-10 ***
## gcs_cat.C         -0.055648   0.036230   -1.536 0.124599    
## iss_cat.L         -0.339735   0.040948   -8.297  < 2e-16 ***
## iss_cat.Q         -0.146951   0.032825   -4.477 7.71e-06 ***
## iss_cat.C         -0.021197   0.023543   -0.900 0.367966    
## invasive1         -0.012699   0.046213   -0.275 0.783492    
## sexm               0.014717   0.018893    0.779 0.436018    
## age_cat.L         -0.089488   0.023037   -3.885 0.000104 ***
## age_cat.Q          0.018338   0.022703    0.808 0.419278    
## age_cat.C          0.025538   0.023456    1.089 0.276306    
## age_cat^4         -0.002723   0.023835   -0.114 0.909035    
## age_cat^5         -0.003950   0.024935   -0.158 0.874144    
## age_cat^6         -0.003952   0.024819   -0.159 0.873498    
## bleeding           0.152813   0.031899    4.791 1.70e-06 ***
## fracture           0.010354   0.029658    0.349 0.727021    
## concussion         0.067613   0.023877    2.832 0.004645 ** 
## brain_edema       -0.278902   0.138117   -2.019 0.043496 *  
## brain_compression -0.132814   0.191376   -0.694 0.487711    
## unconsciousness    0.010731   0.024695    0.435 0.663914    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7001 on 6436 degrees of freedom
## Multiple R-squared:  0.1145, Adjusted R-squared:  0.1118 
## F-statistic: 41.61 on 20 and 6436 DF,  p-value: < 2.2e-16
# Figure out what to do with NA --> make one model with removed
# --> one model with categories

Interpretation:

Let’s look at some diagnostics:

par(mfrow=c(2,2))
plot(model2)

par(mfrow=c(1,1))

# Half-normal plot of residuals
car::qqPlot(resid(model2), distribution="norm", envelope=0.95,
           main="Half-Normal Plot of Residuals", ylab="Residuals")

## [1] 1745 4149
# Predictions
poly_temp$temp_predicted <- predict(model2, poly_temp)

# Plot predicted vs observed
ggplot(poly_temp, aes(x=temperature, y=temp_predicted)) +
  geom_point(alpha=0.5) +
  geom_abline(intercept=0, slope=1, color="red") +
  labs(title="Predicted vs Observed Temperature", x="Observed", y="Predicted") +
  theme_minimal()

I don’t know, doesn’t look so good.

Just look at effects:

# Examine effects of ordered factors
ggplot(poly_temp, aes(x=gcs_cat, y=temperature)) +
  geom_boxplot() +
  labs(title="Temperature by GCS Category", x="GCS Category", y="Temperature") +
  theme_minimal()

ggplot(poly_temp, aes(x=iss_cat, y=temperature)) +
  geom_boxplot() +
  labs(title="Temperature by ISS Category", x="ISS Category", y="Temperature") +
  theme_minimal()

ggplot(poly_temp, aes(x=age_cat, y=temperature)) +
  geom_boxplot() +
  labs(title="Temperature by Age Category", x="Age Category", y="Temperature") +
  theme_minimal()

Model 2.2: Temperature as Outcome (but w/ ordered factors)

Model 3: INR as Outcome

Same thing as above; just use lm.

poly_inr <- poly %>% drop_na(inr)

model3 <- lm(inr ~ gcs_cat + iss_cat + invasive + sex + age_cat + 
             bleeding + fracture + concussion + brain_edema + brain_compression +
             unconsciousness, data = poly_inr)

summary(model3)
## 
## Call:
## lm(formula = inr ~ gcs_cat + iss_cat + invasive + sex + age_cat + 
##     bleeding + fracture + concussion + brain_edema + brain_compression + 
##     unconsciousness, data = poly_inr)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.6226 -0.1714 -0.0679  0.0201  8.4128 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.279481   0.018358  69.697  < 2e-16 ***
## gcs_cat.L          0.153019   0.021149   7.235 5.16e-13 ***
## gcs_cat.Q          0.020010   0.021501   0.931 0.352055    
## gcs_cat.C          0.021486   0.023452   0.916 0.359597    
## iss_cat.L          0.100529   0.025628   3.923 8.85e-05 ***
## iss_cat.Q          0.031382   0.020648   1.520 0.128599    
## iss_cat.C         -0.012946   0.015009  -0.863 0.388422    
## invasive1          0.050266   0.029318   1.715 0.086480 .  
## sexm               0.045608   0.012386   3.682 0.000233 ***
## age_cat.L          0.212063   0.015076  14.067  < 2e-16 ***
## age_cat.Q          0.126273   0.014819   8.521  < 2e-16 ***
## age_cat.C          0.008021   0.015371   0.522 0.601806    
## age_cat^4         -0.007263   0.015584  -0.466 0.641181    
## age_cat^5         -0.009699   0.016323  -0.594 0.552411    
## age_cat^6         -0.002581   0.016188  -0.159 0.873308    
## bleeding          -0.024893   0.020607  -1.208 0.227108    
## fracture          -0.042573   0.019150  -2.223 0.026239 *  
## concussion        -0.038310   0.015502  -2.471 0.013488 *  
## brain_edema        0.114685   0.088663   1.293 0.195888    
## brain_compression  0.169688   0.115377   1.471 0.141413    
## unconsciousness   -0.035385   0.015939  -2.220 0.026449 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4664 on 6674 degrees of freedom
## Multiple R-squared:  0.06086,    Adjusted R-squared:  0.05805 
## F-statistic: 21.62 on 20 and 6674 DF,  p-value: < 2.2e-16

Let’s look at some diagnostics:

par(mfrow=c(2,2))
plot(model3)

par(mfrow=c(1,1))

# Examine relationship between INR and ordered factors
ggplot(poly, aes(x=gcs_cat, y=inr)) +
  geom_boxplot() +
  labs(title="INR by GCS Category", x="GCS Category", y="INR") +
  theme_minimal()

summary(poly$inr)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    0.80    1.10    1.10    1.22    1.20   10.00     380
boxplot(poly$inr)

boxplot(log(poly$inr))

Very bad. If we look at the last plots it’s no wonder, a lot of outliers or very high values distorting it. Maybe approach with ordered factors can help.

Try with a log trafo:

model3_log <- lm(log(inr + 0.0001) ~ gcs_cat + iss_cat + invasive + sex + age_cat + 
                 bleeding + fracture + concussion + brain_edema + brain_compression +
                 unconsciousness, data = poly_inr)

summary(model3_log)
## 
## Call:
## lm(formula = log(inr + 1e-04) ~ gcs_cat + iss_cat + invasive + 
##     sex + age_cat + bleeding + fracture + concussion + brain_edema + 
##     brain_compression + unconsciousness, data = poly_inr)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.40702 -0.11179 -0.03358  0.02994  2.02435 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        0.202474   0.008772  23.081  < 2e-16 ***
## gcs_cat.L          0.099329   0.010106   9.829  < 2e-16 ***
## gcs_cat.Q          0.011733   0.010274   1.142 0.253497    
## gcs_cat.C          0.012231   0.011206   1.091 0.275125    
## iss_cat.L          0.066232   0.012246   5.408 6.59e-08 ***
## iss_cat.Q          0.013092   0.009867   1.327 0.184618    
## iss_cat.C         -0.010313   0.007172  -1.438 0.150484    
## invasive1          0.029004   0.014010   2.070 0.038466 *  
## sexm               0.031779   0.005919   5.369 8.18e-08 ***
## age_cat.L          0.111466   0.007204  15.473  < 2e-16 ***
## age_cat.Q          0.076526   0.007081  10.807  < 2e-16 ***
## age_cat.C          0.005911   0.007345   0.805 0.420985    
## age_cat^4         -0.002012   0.007447  -0.270 0.787051    
## age_cat^5         -0.006708   0.007800  -0.860 0.389862    
## age_cat^6         -0.003508   0.007736  -0.454 0.650184    
## bleeding          -0.023573   0.009847  -2.394 0.016701 *  
## fracture          -0.021178   0.009151  -2.314 0.020678 *  
## concussion        -0.026092   0.007408  -3.522 0.000431 ***
## brain_edema        0.094174   0.042368   2.223 0.026266 *  
## brain_compression  0.089951   0.055133   1.632 0.102828    
## unconsciousness   -0.023403   0.007616  -3.073 0.002130 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2229 on 6674 degrees of freedom
## Multiple R-squared:  0.08771,    Adjusted R-squared:  0.08498 
## F-statistic: 32.08 on 20 and 6674 DF,  p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(model3_log)

par(mfrow=c(1,1))

Same result. Helps a bit, but not nearly enough.